# Direct outputs eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
  scipen = 999,
  digits = 16,
  max.print = .Machine$integer.max,
  show.error.locations = TRUE,
  warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(ggplot2)
library(scales)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

outcome <- "db_w2_wages"
cbb_palette <-
    c(
        "#000000",
        "#E69F00",
        "#56B4E9",
        "#009E73",
        "#F0E442",
        "#0072B2",
        "#D55E00",
        "#CC79A7"
    )


# Read in all estimates

did_laterwinners_coefs <-
    readRDS(
        sprintf("~/estimation-output/event_study_estimates_%s.rds", outcome)
    )

did_laterwinners_coefs <- setDT(did_laterwinners_coefs[[1]])

did_laterwinners_coefs <-
    did_laterwinners_coefs[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_laterwinners_coefs <-
    rbindlist(
        list(
            did_laterwinners_coefs,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
setorderv(did_laterwinners_coefs, "ref_event_time")

# Winsorize top 0.1%
did_laterwinners_coefs_winsorize_top01 <-
    readRDS(
        sprintf(
            "~/estimation-output/event_study_estimates_%s_winsorize_top01.rds",
            outcome
        )
    )

did_laterwinners_coefs_winsorize_top01 <-
    setDT(did_laterwinners_coefs_winsorize_top01[[1]])

did_laterwinners_coefs_winsorize_top01 <-
    did_laterwinners_coefs_winsorize_top01[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_laterwinners_coefs_winsorize_top01 <-
    rbindlist(
        list(
            did_laterwinners_coefs_winsorize_top01,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
did_laterwinners_coefs_winsorize_top01[, cluster_se := 0] # won't use in figure
setorderv(did_laterwinners_coefs_winsorize_top01, "ref_event_time")

# Winsorize top 0.5%
did_laterwinners_coefs_winsorize_top05 <-
    readRDS(
        sprintf(
            "~/estimation-output/event_study_estimates_%s_winsorize_top05.rds",
            outcome
        )
    )

did_laterwinners_coefs_winsorize_top05 <-
    setDT(did_laterwinners_coefs_winsorize_top05[[1]])

did_laterwinners_coefs_winsorize_top05 <-
    did_laterwinners_coefs_winsorize_top05[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_laterwinners_coefs_winsorize_top05 <-
    rbindlist(
        list(
            did_laterwinners_coefs_winsorize_top05,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
did_laterwinners_coefs_winsorize_top05[, cluster_se := 0] # won't use in figure
setorderv(did_laterwinners_coefs_winsorize_top05, "ref_event_time")

# Winsorize top 1%
did_laterwinners_coefs_winsorize_top1 <-
    readRDS(
        sprintf(
            "~/estimation-output/event_study_estimates_%s_winsorize_top1.rds",
            outcome
        )
    )

did_laterwinners_coefs_winsorize_top1 <-
    setDT(did_laterwinners_coefs_winsorize_top1[[1]])

did_laterwinners_coefs_winsorize_top1 <-
    did_laterwinners_coefs_winsorize_top1[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_laterwinners_coefs_winsorize_top1 <-
    rbindlist(
        list(
            did_laterwinners_coefs_winsorize_top1,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
did_laterwinners_coefs_winsorize_top1[, cluster_se := 0] # won't use in figure
setorderv(did_laterwinners_coefs_winsorize_top1, "ref_event_time")

# Winsorize top 2%
did_laterwinners_coefs_winsorize_top2 <-
    readRDS(
        sprintf(
            "~/estimation-output/event_study_estimates_%s_winsorize_top2.rds",
            outcome
        )
    )

did_laterwinners_coefs_winsorize_top2 <-
    setDT(did_laterwinners_coefs_winsorize_top2[[1]])

did_laterwinners_coefs_winsorize_top2 <-
    did_laterwinners_coefs_winsorize_top2[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_laterwinners_coefs_winsorize_top2 <-
    rbindlist(
        list(
            did_laterwinners_coefs_winsorize_top2,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
did_laterwinners_coefs_winsorize_top2[, cluster_se := 0] # won't use in figure
setorderv(did_laterwinners_coefs_winsorize_top2, "ref_event_time")

# Label and combine estimates

did_laterwinners_coefs[, estimate_label := "Unadjusted"]
did_laterwinners_coefs_winsorize_top01[
    ,
    estimate_label := "Winsorized (Top 0.1%)"
]
did_laterwinners_coefs_winsorize_top05[
    ,
    estimate_label := "Winsorized (Top 0.5%)"
]
did_laterwinners_coefs_winsorize_top1[
    ,
    estimate_label := "Winsorized (Top 1%)"
]
did_laterwinners_coefs_winsorize_top2[
    ,
    estimate_label := "Winsorized (Top 2%)"
]

all_specs <-
    rbindlist(
        list(
            did_laterwinners_coefs,
            did_laterwinners_coefs_winsorize_top01,
            did_laterwinners_coefs_winsorize_top05,
            did_laterwinners_coefs_winsorize_top1,
            did_laterwinners_coefs_winsorize_top2
        ),
        use.names = TRUE
    )

all_specs[
    ,
    estimate_label :=
        factor(
            estimate_label,
            levels = c(
                "Unadjusted",
                "Winsorized (Top 0.1%)",
                "Winsorized (Top 0.5%)",
                "Winsorized (Top 1%)",
                "Winsorized (Top 2%)")
        )
]

setnames(all_specs, "ref_event_time", "event_time")

# Produce Appendix Figure B.1 - unadjusted and winsorized event study estimates

figure_b_1 <-
    ggplot(
        aes(x = event_time, y = estimate, color = factor(estimate_label)),
        data = all_specs[between(event_time, -7L, 5L, incbounds = TRUE)]
    ) +
    geom_line() +
    geom_ribbon(
        aes(
            ymin = estimate - (1.64 * cluster_se),
            ymax = estimate + (1.64 * cluster_se)
        ),
        linetype = 0,
        alpha = 0.2,
        show.legend = FALSE
    ) +
    theme_bw(base_size = 13) +
    theme(panel.grid.minor = element_blank()) +
    scale_x_continuous(
        breaks = seq.int(from = -7L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(breaks = breaks_extended(n = 10)) +
    labs(x = "Event Time (ℓ)", y = "Event Study Estimate (2016 USD)") +
    theme(
        legend.title = element_blank(),
        legend.position = c(0.15, 0.0775),
        legend.background = element_rect(fill = "white", color = "grey"),
        strip.text.x = element_blank(),
        strip.background = element_blank(),
        legend.key = element_rect(NA),
        legend.spacing.y = unit(2, "mm"),
        legend.key.height = unit(4, "mm"),
        legend.key.width = unit(4, "mm"),
        legend.margin = margin(t = 0, r = 0.1, b = 0.15, l = 0.1, unit = "cm")
    ) +
    scale_color_manual(vales = cbb_palette) +
    guides(color = guide_legend(ncol = 1, byrow = TRUE)) +
    coord_cartesian(ylim = c(-5000, 500))

ggsave(
    plot = figure_b_1,
    filename = "~/paper/figures/figure-B.1.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_b_1,
    filename = "~/paper/figures/figure-B.1.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

# Housekeeping
outcome <- NULL
cbb_palette <- NULL
did_laterwinners_coefs <- NULL
did_laterwinners_coefs_winsorize_top01 <- NULL
did_laterwinners_coefs_winsorize_top05 <- NULL
did_laterwinners_coefs_winsorize_top1 <- NULL
did_laterwinners_coefs_winsorize_top2 <- NULL
all_specs <- NULL
figure_b_3 <- NULL
rm(
    outcome,
    cbb_palette,
    did_laterwinners_coefs,
    did_laterwinners_coefs_winsorize_top01,
    did_laterwinners_coefs_winsorize_top05,
    did_laterwinners_coefs_winsorize_top1,
    did_laterwinners_coefs_winsorize_top2,
    all_specs,
    figure_b_3
)